
/*

This part generates the datasets to report the summary statistics
*/

/*
* 1. environmental dataset
use "D:\Nanjing\2019\pollution2\data and do\ht_1998_2012",clear


gen a=substr(firmid_raw, 1, 8)
gen b=substr(firmid_raw, 10, 1)
gen c=a+b
drop firmid_raw a b
rename c party_id
gen year1=year

destring year1, replace force
drop year
rename year1 year

keep if year>=2000&year<=2005

rename party_id frdm
keep so2_emission year frdm
gen sample=1

save   "D:\Nanjing\2019\decomposition_data_file\code for submission\envir_so2",replace

***********************************************************************************************

*2. production dataset
use "D:\Nanjing\2019\decomposition_data_file\data_file\combine_data\2000-2005年产品数量库.dta",clear

gen prod_2=int(product_code/1000)
keep if prod_2==0|prod_2==1|prod_2==2|prod_2==3|prod_2==4|prod_2==5|prod_2==6|prod_2==7|prod_2==8|prod_2==10|prod_2==13|prod_2==15|prod_2==20|prod_2==21 ///
|prod_2==22|prod_2==23|prod_2==24|prod_2==27|prod_2==28|prod_2==31|prod_2==32|prod_2==37|prod_2==38|prod_2==41|prod_2==45


* aggregate quantity by industry in all years
bysort prod_2: egen ub_1=pctile(quantity),p(99)

bysort prod_2: egen tot_quant=sum(quantity) 
bysort prod_2: egen m_quant=mean(quantity) if quantity<ub_1
bysort prod_2: egen med_quant=median(quantity)
bysort prod_2: egen ub=pctile(quantity),p(95)
bysort prod_2: egen lb=pctile(quantity),p(5)
*bysort prod_2 year: egen ann_quant=sum(quantity)

by prod_2: egen ub_2=pctile(quantity), p(99.5)
replace tot_quant=sum(quantity) if quantity<ub_2& prod_2==7
replace tot_quant=tot_quant/1000
replace m_quant=m_quant/1000
replace med_quant=med_quant/1000 

keep prod_2 tot_quant m_quant med_quant

gen sample=2

drop if prod_2==.|prod_2==5|prod_2==8
save   "D:\Nanjing\2019\decomposition_data_file\code for submission\production",replace

*****************************************************************************************************
* 3 production+manufacturing

clear
set matsize 10000
cd D:
use "D:\Nanjing\2019\decomposition_data_file\data_file\combine_data\enviro_frdm_year.dta" 




merge m:m frdm year using "D:\Nanjing\2019\decomposition_data_file\data_file\combine_data\2000-2005年产品数量库.dta"
gen prod_2=int(product_code/1000)

keep if prod_2==0|prod_2==1|prod_2==2|prod_2==3|prod_2==4|prod_2==5|prod_2==6|prod_2==7|prod_2==8|prod_2==10|prod_2==13|prod_2==15|prod_2==20|prod_2==21 ///
|prod_2==22|prod_2==23|prod_2==24|prod_2==27|prod_2==28|prod_2==31|prod_2==32|prod_2==37|prod_2==38|prod_2==41|prod_2==45
bysort prod_2: egen ub_1=pctile(quantity),p(99)
by prod_2: egen ub_2=pctile(quantity), p(99.5)

keep if _merge==3
drop _merge

*duplicates drop frdm year, force



* aggregate quantity by industry in all years

* aggregate quantity by industry in all years
*bysort prod_2: egen ub_1=pctile(quantity),p(99)



bysort prod_2: egen tot_quant=sum(quantity)
bysort prod_2: egen m_quant=mean(quantity) if quantity<ub_1
bysort prod_2: egen med_quant=median(quantity)
bysort prod_2: egen ub=pctile(quantity),p(95)
bysort prod_2: egen lb=pctile(quantity),p(5)
*bysort prod_2 year: egen ann_quant=sum(quantity)
replace tot_quant=sum(quantity) if quantity<ub_2& prod_2==7

replace tot_quant=tot_quant/1000
replace m_quant=m_quant/1000
replace med_quant=med_quant/1000

drop if prod_2==5|prod_2==8

gen mean_r=457797
gen median_r=41255
gen sd_r=2553178

gen mean_k=357577
gen median_k=20868.39
gen sd_k= 2499673


gen mean_l=1542.931
gen median_l=306
gen sd_l=6726.919

gen mean_m=342659
gen median_m=30216
gen sd_m=1906517

gen mean_so2=252386.5
gen median_so2=8000
gen sd_so2=2453999



keep prod_2 tot_quant m_quant med_quant mean_r median_r sd_r mean_k median_k sd_k mean_l median_l sd_l mean_m median_m sd_m mean_so2 median_so2 sd_so2
gen sample=4




save  "D:\Nanjing\2019\decomposition_data_file\code for submission\match1",replace

bysort prod_2: sum tot_quant
bysort prod_2: sum m_quant

bysort prod_2: sum med_quant




**************************************************************





use "D:\Nanjing\2019\industry_data\1998-2007主要指标+capital+tfp",clear
keep if year>=2000&year<=2005
keep frdm year totaloutput aemployment totalintermediate real_cap
gen sample=3
save  "D:\Nanjing\2019\decomposition_data_file\code for submission\ind",replace


************************************************************************************************





use "D:\Nanjing\2019\decomposition_data_file\code for submission\envir_so2",clear
append using  "D:\Nanjing\2019\decomposition_data_file\code for submission\production"
append using  "D:\Nanjing\2019\decomposition_data_file\code for submission\ind"
append using  "D:\Nanjing\2019\decomposition_data_file\code for submission\match1"


save "D:\Nanjing\2019\decomposition_final\data\summary",replace
***********************************************************************************************************************

*/



**********************************Table D1**********************************************************


**********************************Table D1**********************************************************

**********************************Table D1**********************************************************
use "D:\Nanjing\2019\decomposition_final\data\summary",clear


*-----------------------------------------------
*Column 1-3 of Table D1: environmental dataset
*------------------------------------------------
sum so2_emission if sample==1
sum so2_emission if sample==1,d


*-------------------------------------------------------
* column 4-6 of Table D1: manufacturing survey
*--------------------------------------------------------
sum totaloutput real_cap aemployment totalintermediate if sample==3



*-------------------------------------------------------------
* column 7-9 of Table D1: matched sample
*-------------------------------------------------------------
sum mean_r median_r sd_r
sum mean_k median_k sd_k
sum mean_l median_l sd_l

sum mean_m median_m sd_m
sum mean_so2 median_so2 sd_so2









**********************************Table D2**********************************************************

**********************************Table D2**********************************************************

**********************************Table D2**********************************************************


use "D:\Nanjing\2019\decomposition_final\data\summary",clear


*-----------------------------------------------
*Column 1-3 of Table D2: production set
*------------------------------------------------
bysort prod_2: sum tot_quant if sample==2
bysort prod_2: sum m_quant if sample==2

bysort prod_2: sum med_quant if sample==2


*-----------------------------------------------
*Column 4-6 of Table D2: matched sample
*------------------------------------------------

bysort prod_2: sum tot_quant if sample==4
bysort prod_2: sum m_quant if sample==4

bysort prod_2: sum med_quant if sample==4


**************************************












